import os
import sys
import tempfile
import time
import shutil
import uuid
from pathlib import Path
import pandas as pd
from moviepy.editor import VideoFileClip
from external.oss_utils import OssManager
from external.aliyun_asr import AliyunASR
from tools.word_dataset_generator import generate_word_dataset
from service.video_processor import VideoProcessor
import streamlit as st


# 新增函数 - 将字幕文件转换为DataFrame（不保存为CSV）
def convert_subtitle_to_dataframe(subtitle_path):
    try:
        # 尝试多种编码打开字幕文件
        encodings = ['utf-8', 'gbk', 'gb2312', 'big5', 'latin-1']
        subtitle_content = None

        for encoding in encodings:
            try:
                with open(subtitle_path, 'r', encoding=encoding) as f:
                    subtitle_content = f.read()
                print(f"成功使用 {encoding} 编码打开字幕文件")
                break
            except UnicodeDecodeError:
                continue

        if subtitle_content is None:
            print(f"未知编码方式的字幕文件")
            return None

        # 检测字幕文件类型
        file_ext = os.path.splitext(subtitle_path)[1].lower()

        # 解析字幕内容
        data = []

        if file_ext == '.lrc':
            # 处理LRC格式
            lines = subtitle_content.strip().split('\n')
            timestamps_with_text = []
            for line in lines:
                # 跳过不包含时间戳的行或元信息行
                if not line.strip() or not '[' in line or line.startswith('[ti:') or line.startswith(
                        '[ar:') or line.startswith('[al:'):
                    continue

                # 提取所有时间戳和文本
                # LRC格式可能一行有多个时间戳: [00:01.00][00:10.00]文本内容
                text_parts = line.split(']')
                if len(text_parts) < 2:
                    continue

                # 获取文本内容（最后一部分）
                sentence = text_parts[-1].strip()

                # 处理每个时间戳
                # 首先收集所有时间戳和对应的文本
                for i in range(len(text_parts) - 1):
                    timestamp_part = text_parts[i].strip()
                    if not timestamp_part.startswith('['):
                        continue

                    # 提取时间戳 [mm:ss.xx]
                    timestamp = timestamp_part[1:]
                    try:
                        # 解析分钟和秒
                        if '.' in timestamp:
                            mm_ss, ms = timestamp.split('.')
                        else:
                            mm_ss, ms = timestamp, '0'

                        if ':' in mm_ss:
                            mm, ss = mm_ss.split(':')
                        else:
                            mm, ss = '0', mm_ss

                        # 转换为毫秒
                        start_ms = int(mm) * 60 * 1000 + int(ss) * 1000 + int(ms.ljust(3, '0')[:3])
                        timestamps_with_text.append((start_ms, sentence))
                    except Exception as e:
                        print(f"解析LRC时间戳出错: {timestamp_part}, 错误: {e}")

            # 按时间戳排序
            timestamps_with_text.sort(key=lambda x: x[0])

            # 处理排序后的时间戳，使用下一个时间戳作为结束时间
            for i in range(len(timestamps_with_text)):
                start_ms = timestamps_with_text[i][0]
                sentence = timestamps_with_text[i][1]

                # 如果有下一个时间戳，使用它作为结束时间，否则默认+5秒
                if i < len(timestamps_with_text) - 1:
                    end_ms = timestamps_with_text[i + 1][0]
                else:
                    end_ms = start_ms + 5000  # 最后一个字幕默认显示5秒

                # 格式化为SRT格式的时间戳
                start_time = f"{int(start_ms / 3600000):02d}:{int((start_ms % 3600000) / 60000):02d}:{int((start_ms % 60000) / 1000):02d},{start_ms % 1000:03d}"
                end_time = f"{int(end_ms / 3600000):02d}:{int((end_ms % 3600000) / 60000):02d}:{int((end_ms % 60000) / 1000):02d},{end_ms % 1000:03d}"

                timestamp_str = f"{start_time}-->{end_time}"
                data.append({'timestamp': timestamp_str, 'sentence': sentence})
        else:
            # 处理SRT等其他格式
            lines = subtitle_content.strip().split('\n')
            i = 0

            while i < len(lines):
                # 跳过空行
                if not lines[i].strip():
                    i += 1
                    continue

                # 跳过序号行
                if lines[i].strip().isdigit():
                    i += 1

                    # 检查是否有时间戳行
                    if i < len(lines) and '-->' in lines[i]:
                        timestamp = lines[i].strip()
                        i += 1

                        # 收集字幕文本
                        sentence = ""
                        while i < len(lines) and lines[i].strip() and not lines[i].strip().isdigit() and '-->' not in \
                                lines[i]:
                            if sentence:
                                sentence += " "
                            sentence += lines[i].strip()
                            i += 1

                        data.append({'timestamp': timestamp, 'sentence': sentence})
                    else:
                        i += 1
                else:
                    i += 1

        return pd.DataFrame(data)
    except Exception as e:
        print(f"字幕文件转换过程中出现错误: {e}")
        return None


class UIProcessor:
    """UI处理器类，负责处理界面与业务逻辑之间的交互"""

    def __init__(self, project_root, output_dir):
        """初始化UI处理器
        
        参数:
            project_root: 项目根目录
            output_dir: 输出目录
            temp_dir: 临时目录
        """
        self.project_root = project_root
        self.output_dir = output_dir
        self.video_processor = VideoProcessor()
        self.oss_manager = OssManager()

    def get_ip_options(self, output_dir):
        """获取IP数据集选项列表
        
        参数:
            output_dir: 输出目录
            
        返回:
            list: IP选项列表
        """
        ip_folders = [f for f in os.listdir(output_dir) if
                      os.path.isdir(os.path.join(output_dir, f)) and f.startswith("IP")]
        return ["none"] + ip_folders  # 添加none选项表示创建新的IP数据集

    def get_ip_metadata_options(self, project_root, include_empty=False):
        """获取IP元数据选项
        
        参数:
            project_root: 项目根目录
            include_empty: 是否包含空选项
            
        返回:
            tuple: (显示名称列表, 选项字典列表)
        """
        ip_metadata_path = os.path.join(project_root, "output", "ip_metadata.csv")
        ip_options = []

        if include_empty:
            ip_options = []  # 默认选项

        if os.path.exists(ip_metadata_path):
            try:
                ip_df = pd.read_csv(ip_metadata_path)
                for _, row in ip_df.iterrows():
                    ip_id = row['ip_id']
                    ip_name = row['ip_name'].strip() if isinstance(row['ip_name'], str) else row['ip_name']

                    # 检查该IP文件夹是否存在
                    if os.path.isdir(os.path.join(self.output_dir, ip_id)):
                        ip_options.append({"id": ip_id, "name": ip_name})
            except Exception as e:
                print(f"读取IP元数据文件出错: {e}")

        # 创建显示名称列表
        ip_display_names = [opt["name"] for opt in ip_options]

        return ip_display_names, ip_options

    def get_selected_ip_id(self, ip_options, selected_display):
        """根据显示名称获取选择的IP ID
        
        参数:
            ip_options: IP选项字典列表
            selected_display: 选择的显示名称
            
        返回:
            str: 选择的IP ID
        """
        for opt in ip_options:
            if opt["name"] == selected_display:
                return opt["id"]
        return "none"

    def get_ip_name_by_id(self, ip_id):
        """根据IP ID获取IP名称
        参数:
            ip_id: IP ID
            
        返回:
            str: IP名称
        """
        ip_metadata_path = os.path.join(self.output_dir, "ip_metadata.csv")
        if os.path.exists(ip_metadata_path):
            try:
                ip_df = pd.read_csv(ip_metadata_path)
                if ip_id in ip_df['ip_id'].values:
                    return ip_df.loc[ip_df['ip_id'] == ip_id, 'ip_name'].iloc[0]
            except:
                pass
        return ip_id

    def process_video(self, video_path, output_dir, logger, progress_placeholder, status_placeholder,
                      result_placeholder):
        """处理视频
        
        参数:
            video_path: 视频文件路径
            output_dir: 输出目录
            selected_ip: 选择的IP ID
            logger: 日志记录器
            progress_placeholder: 进度条占位符
            status_placeholder: 状态信息占位符
            result_placeholder: 结果显示占位符
        """
        # 重定向标准输出到Streamlit界面
        import sys
        original_stdout = sys.stdout
        sys.stdout = logger

        try:
            # 检查视频时长
            video_clip = VideoFileClip(video_path)
            video_duration = video_clip.duration
            video_clip.close()

            # 设置输出目录
            video_path_obj = Path(video_path)

            # 使用ip_id作为输出目录名
            if output_dir is None:
                output_dir = video_path_obj.parent / f"LRC_{video_path_obj.stem}"

            output_dir = Path(output_dir)
            os.makedirs(output_dir, exist_ok=True)

            # 设置文件路径
            audio_path = output_dir / f"{video_path_obj.stem}.mp3"
            lrc_path = output_dir / f"{video_path_obj.stem}.lrc"
            csv_path = output_dir / f"{video_path_obj.stem}.csv"
            video_path_output = output_dir / f"{video_path_obj.stem}.mp4"

            # 复制原始视频文件到输出目录
            print("复制视频文件到输出目录...")
            shutil.copy2(video_path, str(video_path_output))
            print(f"视频文件已保存到: {video_path_output}")

            # 如果视频超过2小时，则切割为1小时的片段
            if video_duration > 7200:  # 2小时 = 7200秒
                self._process_long_video(
                    video_path,
                    output_dir,
                    video_path_obj,
                    lrc_path,
                    video_path_output,
                    progress_placeholder,
                    status_placeholder,
                    result_placeholder
                )
            else:
                # 视频时长不超过2小时，按简化流程处理
                self._process_short_video(
                    video_path,
                    output_dir,
                    video_path_obj,
                    audio_path,
                    csv_path,
                    lrc_path,
                    video_path_output,
                    progress_placeholder,
                    status_placeholder,
                    result_placeholder
                )

            return True
        except Exception as e:
            print(f"视频处理过程中出现错误: {e}")
            status_placeholder.error(f"处理错误: {e}")
            return False
        finally:
            # 恢复标准输出
            sys.stdout = original_stdout

    def _process_long_video(self, video_path, output_dir, video_path_obj, lrc_path, video_path_output,
                            progress_placeholder, status_placeholder, result_placeholder):
        """处理长视频（超过2小时）
        
        参数:
            video_path: 视频文件路径
            output_dir: 输出目录
            video_path_obj: 视频路径对象
            lrc_path: LRC文件路径
            video_path_output: 输出视频路径
            progress_placeholder: 进度条占位符
            status_placeholder: 状态信息占位符
            result_placeholder: 结果显示占位符
        """
        print(f"视频时长超过2小时，将进行切割处理")
        progress_placeholder.progress(0.05)
        status_placeholder.info("正在切割视频...")

        # 创建segments目录
        segments_dir = output_dir / "segments"
        os.makedirs(segments_dir, exist_ok=True)

        segment_paths = self.video_processor.split_video(video_path, str(segments_dir),
                                                         max_duration=3600)  # 1小时 = 3600秒

        if not segment_paths:
            status_placeholder.error("视频切割失败")
            return False

        # 处理每个片段
        audio_paths = []
        csv_paths = []
        lrc_paths = []

        for i, segment_path in enumerate(segment_paths):
            segment_name = Path(segment_path).stem
            print(f"\n处理视频片段 {i + 1}/{len(segment_paths)}: {segment_name}")

            # 更新进度条
            progress_value = 0.05 + (0.7 * (i / len(segment_paths)))
            progress_placeholder.progress(progress_value)
            status_placeholder.info(f"正在处理视频片段 {i + 1}/{len(segment_paths)}...")

            # 设置片段的输出文件路径
            segment_audio_path = output_dir / f"{segment_name}.mp3"
            segment_csv_path = output_dir / f"{segment_name}.csv"
            segment_lrc_path = output_dir / f"{segment_name}.lrc"

            # 处理单个片段
            success = self._process_video_segment(
                segment_path,
                str(segment_audio_path),
                str(segment_csv_path),
                str(segment_lrc_path),
                segment_name
            )

            if success:
                audio_paths.append(str(segment_audio_path))
                csv_paths.append(str(segment_csv_path))
                lrc_paths.append(str(segment_lrc_path))

        # 合并所有CSV结果
        if csv_paths:
            print("\n合并所有CSV结果...")
            status_placeholder.info("正在合并处理结果...")
            progress_placeholder.progress(0.85)

            merged_csv_path = output_dir / f"{video_path_obj.stem}.csv"
            if not self.video_processor.merge_csv_results(csv_paths, str(merged_csv_path)):
                status_placeholder.error("合并CSV结果失败")
                return False

            # 合并所有LRC文件
            print("\n合并所有LRC文件...")
            merged_lrc_path = output_dir / f"{video_path_obj.stem}.lrc"
            if not self.video_processor._merge_lrc_files(lrc_paths, str(merged_lrc_path), video_path_obj.stem):
                status_placeholder.error("合并LRC文件失败")
                return False
        else:
            status_placeholder.error("没有成功处理的视频片段")
            return False

        progress_placeholder.progress(1.0)

        # 显示结果
        print(f"\nLRC字幕文件已生成: {lrc_path}")
        print(f"视频文件已保存: {video_path_output}")
        result_placeholder.markdown(f"LRC字幕文件已生成: `{lrc_path}`")
        result_placeholder.markdown(f"视频文件已保存: `{video_path_output}`")

        print(f"\n视频处理完成! LRC字幕文件保存在: {lrc_path}")
        status_placeholder.success(f"视频处理完成! LRC字幕文件和视频文件已保存到输出目录")
        return True

    def _process_short_video(self, video_path, output_dir, video_path_obj, audio_path, csv_path, lrc_path,
                             video_path_output, progress_placeholder, status_placeholder, result_placeholder):
        """处理短视频（不超过2小时）
        
        参数:
            video_path: 视频文件路径
            output_dir: 输出目录
            video_path_obj: 视频路径对象
            audio_path: 音频文件路径
            csv_path: CSV文件路径
            lrc_path: LRC文件路径
            video_path_output: 输出视频路径
            progress_placeholder: 进度条占位符
            status_placeholder: 状态信息占位符
            result_placeholder: 结果显示占位符
        """
        # 1. 提取音频
        print(f"开始处理文件: {video_path_obj.name}")
        print("开始提取音频...")
        progress_placeholder.progress(0.1)
        status_placeholder.info("正在提取音频...")

        if not self.video_processor.extract_audio(str(video_path), str(audio_path)):
            status_placeholder.error("音频提取失败")
            return False

        progress_placeholder.progress(0.3)

        # 2. 音频转文本 (使用阿里云听悟API)
        print("\n开始将音频转换为文本...")
        status_placeholder.info("正在将音频转换为文本...")

        # 使用阿里云听悟API
        asr = AliyunASR()
        if not asr.available:
            print("阿里云ASR服务不可用，请检查凭证设置")
            status_placeholder.error("阿里云ASR服务不可用，请检查凭证设置")
            return False

        # 上传音频文件到OSS获取URL
        if self.oss_manager.available:
            oss_path = f"temp_audio/{video_path_obj.stem}.mp3"
            file_url = self.oss_manager.upload_file(str(audio_path), oss_path)
            if not file_url:
                print("上传音频文件到OSS失败")
                status_placeholder.error("上传音频文件到OSS失败")
                return False

            # 使用阿里云ASR转录
            progress_placeholder.progress(0.5)
            status_placeholder.info("正在使用阿里云听悟API转录音频...")
            success, _ = asr.transcribe_file(file_url, str(csv_path))

            # 转录完成后删除OSS上的音频文件
            self.oss_manager.delete_file(oss_path)
            print(f"已从OSS删除临时音频文件: {oss_path}")

            if not success:
                status_placeholder.error("音频转文本失败")
                return False

            # 3. 将CSV转换为LRC格式
            print("\n开始将CSV转换为LRC格式...")
            status_placeholder.info("正在生成LRC字幕文件...")
            progress_placeholder.progress(0.7)

            if not self.video_processor._csv_to_lrc(str(csv_path), str(lrc_path), video_path_obj.stem):
                status_placeholder.error("CSV转LRC失败")
                return False
        else:
            print("使用阿里云听悟API需要启用OSS存储")
            status_placeholder.error("使用阿里云听悟API需要启用OSS存储")
            return False

        progress_placeholder.progress(0.9)

        # 显示结果
        print(f"\nLRC字幕文件已生成: {lrc_path}")
        print(f"视频文件已保存: {video_path_output}")
        result_placeholder.markdown(f"LRC字幕文件已生成: `{lrc_path}`")
        result_placeholder.markdown(f"视频文件已保存: `{video_path_output}`")

        progress_placeholder.progress(1.0)
        print(f"\n视频处理完成! LRC字幕文件保存在: {lrc_path}")
        status_placeholder.success(f"视频处理完成! LRC字幕文件和视频文件已保存到输出目录")
        return True

    def _process_video_segment(self, segment_path, segment_audio_path, segment_csv_path, segment_lrc_path,
                               segment_name):
        """处理单个视频片段
        
        参数:
            segment_path: 片段视频路径
            segment_audio_path: 片段音频路径
            segment_csv_path: 片段CSV路径
            segment_lrc_path: 片段LRC路径
            segment_name: 片段名称
            
        返回:
            bool: 是否成功处理
        """
        # 1. 提取音频
        print("提取音频...")
        if not self.video_processor.extract_audio(segment_path, segment_audio_path):
            print(f"片段 {segment_name} 音频提取失败")
            return False

        # 2. 音频转文本 (使用阿里云听悟API)
        print("将音频转换为文本...")

        # 使用阿里云听悟API
        asr = AliyunASR()
        if not asr.available:
            print("阿里云ASR服务不可用，请检查凭证设置")
            return False

        # 上传音频文件到OSS获取URL
        if self.oss_manager.available:
            oss_path = f"temp_audio/{segment_name}.mp3"
            file_url = self.oss_manager.upload_file(segment_audio_path, oss_path)
            if not file_url:
                print(f"上传音频文件 {segment_name} 到OSS失败")
                return False

            # 使用阿里云ASR转录
            success, _ = asr.transcribe_file(file_url, segment_csv_path)

            # 转录完成后删除OSS上的音频文件
            self.oss_manager.delete_file(oss_path)
            print(f"已从OSS删除临时音频文件: {oss_path}")

            if not success:
                print(f"片段 {segment_name} 音频转文本失败")
                return False

            # 3. 将CSV转换为LRC格式
            print("将CSV转换为LRC格式...")
            if not self.video_processor._csv_to_lrc(segment_csv_path, segment_lrc_path, segment_name):
                print(f"片段 {segment_name} CSV转LRC失败")
                return False

            return True
        else:
            print("使用阿里云听悟API需要启用OSS存储")
            return False

    def process_video_with_subtitle(self, video_path, subtitle_path, selected_ip, logger,
                                    progress_placeholder, status_placeholder, result_placeholder):
        """处理视频和字幕文件
        
        参数:
            video_path: 视频文件路径
            subtitle_path: 字幕文件路径
            selected_ip: 选择的IP ID
            logger: 日志记录器
            progress_placeholder: 进度条占位符
            status_placeholder: 状态信息占位符
            result_placeholder: 结果显示占位符
            output_dir: 输出目录
        """
        # 重定向标准输出到Streamlit界面
        import sys
        original_stdout = sys.stdout
        sys.stdout = logger

        try:
            # 设置输出目录
            video_path_obj = Path(video_path)
            subtitle_path_obj = Path(subtitle_path)

            # 使用ip_id作为输出目录名
            if selected_ip == 'none':
                ip_name = video_path_obj.stem
                # 创建新的IP数据集 - 先检查元数据中是否已有该IP名称
                ip_metadata_path = os.path.join(self.output_dir, "ip_metadata.csv")
                if os.path.exists(ip_metadata_path):
                    ip_df = pd.read_csv(ip_metadata_path)
                    if ip_df['ip_name'].str.contains(ip_name).any():
                        # 如果已存在，使用已有的IP ID
                        selected_ip = ip_df.loc[ip_df['ip_name'] == ip_name, 'ip_id'].iloc[0]
                    else:
                        # 如果不存在，创建新的IP ID
                        selected_ip = f"IP{uuid.uuid4().hex[:8]}"
                        # 添加新的IP元数据
                        self.video_processor.create_ip_metadata(ip_name, selected_ip, ip_metadata_path)
                else:
                    # 创建新的IP元数据文件
                    selected_ip = f"IP{uuid.uuid4().hex[:8]}"
                    self.video_processor.create_ip_metadata(ip_name, selected_ip, ip_metadata_path)

            image_output_dir = os.path.join(self.output_dir, 'images', 'original', selected_ip)
            image_output_dir = Path(image_output_dir)
            os.makedirs(image_output_dir, exist_ok=True)

            audio_output_dir = os.path.join(self.output_dir, 'audio', 'original', selected_ip)
            audio_output_dir = Path(audio_output_dir)
            os.makedirs(audio_output_dir, exist_ok=True)

            # 1. 将字幕文件转换为CSV格式
            print(f"开始处理文件: {video_path_obj.name} 和 {subtitle_path_obj.name}")
            progress_placeholder.progress(0.1)

            subtitle_df = convert_subtitle_to_dataframe(subtitle_path)
            subtitle_df['ip_id'] = selected_ip
            if subtitle_df is None:
                status_placeholder.error("字幕文件转换失败")
                return False

            progress_placeholder.progress(0.3)

            # 2. 根据CSV提取视频帧
            print("\n开始根据字幕提取视频帧...")
            status_placeholder.info("正在根据字幕提取视频帧...")

            # 定义进度回调函数
            frame_progress = st.progress(0)

            def frame_update_progress(progress):
                frame_progress.progress(progress)

            if not self.video_processor.extract_frames_from_dataframe(str(video_path), subtitle_df,
                                                                      str(image_output_dir), selected_ip,
                                                                      progress_callback=frame_update_progress):
                status_placeholder.error("视频帧提取失败")
                return False

            # 3. 显示结果
            print(f"\n抽帧处理完成! 结果保存在: {image_output_dir}")
            progress_placeholder.progress(0.6)

            # 提取音频片段 - 直接使用DataFrame
            print("\n开始提取音频片段...")
            status_placeholder.info("正在提取音频片段...")

            # 创建音频提取进度条
            audio_progress = st.progress(0)

            # 定义音频进度回调函数
            def update_audio_progress(progress):
                audio_progress.progress(progress)

            # 提取音频片段，传递进度回调函数
            if not self.video_processor.extract_audio_clips_from_dataframe(str(video_path), subtitle_df,
                                                                           str(audio_output_dir), selected_ip,
                                                                           progress_callback=update_audio_progress):
                print("音频片段提取失败，但继续处理")

            progress_placeholder.progress(0.9)

            # 4. 创建字幕与图片关联的CSV文件 - 直接使用DataFrame
            os.makedirs(os.path.join(self.output_dir, selected_ip), exist_ok=True)
            subtitles_images_path = os.path.join(self.output_dir, selected_ip, "subtitles_images.csv")
            try:
                subtitle_df.to_csv(subtitles_images_path, index=False)
                print(f"成功创建字幕与图片关联文件: {subtitles_images_path}")
            except Exception as e:
                print(f"创建字幕与图片关联文件失败: {e}")

            progress_placeholder.progress(1.0)
            status_placeholder.success(f"处理完成! 结果已保存到输出目录")
            return True
        except Exception as e:
            print(f"处理过程中出现错误: {e}")
            status_placeholder.error(f"处理错误: {e}")
            return False
        finally:
            # 恢复标准输出
            sys.stdout = original_stdout

    def generate_word_dataset(self, word_list_path, selected_ip, logger, status_placeholder, result_placeholder):
        """生成单词数据集
        
        参数:
            word_list_path: 单词列表文件路径
            selected_ip: 选择的IP ID
            logger: 日志记录器
            status_placeholder: 状态信息占位符
            result_placeholder: 结果显示占位符
        """
        # 重定向标准输出到Streamlit界面
        import sys
        original_stdout = sys.stdout
        sys.stdout = logger

        try:
            # 检查IP数据集是否存在
            ip_dir = os.path.join(self.output_dir, selected_ip)
            if not os.path.isdir(ip_dir):
                print(f"IP数据集目录不存在: {ip_dir}")
                status_placeholder.error(f"IP数据集目录不存在: {selected_ip}")
                return False

            # 读取单词列表
            with open(word_list_path, 'r', encoding='utf-8') as f:
                words = [line.strip() for line in f if line.strip()]

            if not words:
                print("单词列表为空")
                status_placeholder.error("单词列表为空")
                return False

            print(f"读取到 {len(words)} 个单词")
            status_placeholder.info(f"正在为 {len(words)} 个单词生成数据集...")

            # 调用单词数据集生成函数
            result = generate_word_dataset(
                word_list_path,
                ip_dir,
            )

            if result:
                return True
            else:
                print("单词数据集生成失败")
                status_placeholder.error("单词数据集生成失败")
                return False
        except Exception as e:
            print(f"生成单词数据集过程中出现错误: {e}")
            status_placeholder.error(f"处理错误: {e}")
            return False
        finally:
            # 恢复标准输出
            sys.stdout = original_stdout
